#### "Reanalyzing the Link between Democracy and Economic Development" ####
# journal: International Area Studies Review
# authors: "Pelke, Lars"
# date: 2023-07-24
# written under "R version 4.1.2 (2021-11-01)"

#### Preliminaries ####

library(plm)
library(MASS)
library(multiwayvcov)
library(ggeffects)
library(PanelMatch)
library(tidyverse)
library(ggpubr)

#run code 01_data_wrangling and before using this code

# set your working directory 
# setwd()


# clear workspace
rm(list=ls())

#### Import Data ####

# Load VDem data
vdem <- readRDS("data/vdem_data.rds") 

vdem <- vdem %>%
  filter(year >=1896)

#### Difference in Difference Design #### compare Imai et al. 2020, package: https://github.com/insongkim/PanelMatch

vdem$year <- as.integer(vdem$year)

vdem <- distinct(vdem, country_id, year, .keep_all= TRUE) # Distinct observations

#### Define outcome variable ####

vdem$y <- vdem$gdppc_ppp_bcbt_mean_log*100
summary(vdem$y)

##### Democratization and Economic Development ######

#### Panel Match Models ####

vdem <- as.data.frame(vdem)
class(vdem)


## Before Refinement ##
PM.results.none <- PanelMatch(lag = 4, time.id = "year", unit.id = "country_id", 
                              treatment = "dem", refinement.method = "none", 
                              data = vdem, match.missing = FALSE, 
                              covs.formula = ~ I(lag(y, 1:4)), 
                              size.match = 10, qoi = "att" ,outcome.var = "y",
                              lead = 0:20, forbid.treatment.reversal = FALSE, 
                              use.diagonal.variance.matrix = TRUE)

## Mahanlanobis Distance Matching ##
PM.results_mahalanobis <- PanelMatch(lag = 4, time.id = "year", unit.id = "country_id", 
                                     treatment = "dem", refinement.method = "mahalanobis", 
                                     data = vdem, match.missing = TRUE, 
                                     covs.formula = ~ I(lag(v2x_polyarchy, 1:4)) + I(lag(Maddison2020_pop_mean_log, 1:4)) +
                                       I(lag(v2svstterr, 1:4)) + I(lag(v2clrspct, 1:4)) +  
                                       I(lag(gdppc_ppp_bcbt_growth_rate_5avg, 1:4)) +
                                       I(lag(y, 1:4)), 
                                     size.match = 10, qoi = "att" ,outcome.var = "y",
                                     lead = 0:20, forbid.treatment.reversal = FALSE, 
                                     use.diagonal.variance.matrix = TRUE)


## Propensity Score Matching ##
PM.results_psmatch <- PanelMatch(lag = 4, time.id = "year", unit.id = "country_id", 
                                 treatment = "dem", refinement.method = "ps.match", 
                                 data = vdem, match.missing = TRUE, 
                                 covs.formula = ~ I(lag(v2x_polyarchy, 1:4)) + I(lag(Maddison2020_pop_mean_log, 1:4)) +
                                   I(lag(v2svstterr, 1:4)) + I(lag(v2clrspct, 1:4)) + 
                                   I(lag(gdppc_ppp_bcbt_growth_rate_5avg, 1:4)) +
                                   I(lag(y, 1:4)), 
                                 size.match = 10, qoi = "att" ,outcome.var = "y",
                                 lead = 0:20, forbid.treatment.reversal = FALSE, 
                                 use.diagonal.variance.matrix = TRUE)

## Propensity Score Weighting ##
PM.results_psweight <- PanelMatch(lag = 4, time.id = "year", unit.id = "country_id", 
                                  treatment = "dem", refinement.method = "ps.weight", 
                                  data = vdem, match.missing = TRUE, 
                                  covs.formula = ~ I(lag(v2x_polyarchy, 1:4)) + I(lag(Maddison2020_pop_mean_log, 1:4)) +
                                    I(lag(v2svstterr, 1:4)) + I(lag(v2clrspct, 1:4)) + 
                                    I(lag(gdppc_ppp_bcbt_growth_rate_5avg, 1:4)) +
                                    I(lag(y, 1:4)), 
                                  size.match = 10, qoi = "att" ,outcome.var = "y",
                                  lead = 0:20, forbid.treatment.reversal = FALSE, 
                                  use.diagonal.variance.matrix = TRUE)

## CBPS Weighting ##
PM.results_cbpsweight <- PanelMatch(lag = 4, time.id = "year", unit.id = "country_id", 
                                  treatment = "dem", refinement.method = "CBPS.weight", 
                                  data = vdem, match.missing = TRUE, 
                                  covs.formula = ~ I(lag(v2x_polyarchy, 1:4)) + I(lag(Maddison2020_pop_mean_log, 1:4)) +
                                    I(lag(v2svstterr, 1:4)) + I(lag(v2clrspct, 1:4)) + 
                                    I(lag(gdppc_ppp_bcbt_growth_rate_5avg, 1:4)) +
                                    I(lag(y, 1:4)), 
                                  size.match = 10, qoi = "att" ,outcome.var = "y",
                                  lead = 0:20, forbid.treatment.reversal = FALSE, 
                                  use.diagonal.variance.matrix = TRUE)

## CBPS Matching ##
PM.results_cbpsmatch <- PanelMatch(lag = 4, time.id = "year", unit.id = "country_id", 
                                  treatment = "dem", refinement.method = "CBPS.match", 
                                  data = vdem, match.missing = TRUE, 
                                  covs.formula = ~ I(lag(v2x_polyarchy, 1:4)) + I(lag(Maddison2020_pop_mean_log, 1:4)) +
                                    I(lag(v2svstterr, 1:4)) + I(lag(v2clrspct, 1:4)) + 
                                    I(lag(gdppc_ppp_bcbt_growth_rate_5avg, 1:4)) +
                                    I(lag(y, 1:4)), 
                                  size.match = 10, qoi = "att" ,outcome.var = "y",
                                  lead = 0:20, forbid.treatment.reversal = FALSE, 
                                  use.diagonal.variance.matrix = TRUE)

######################################################

## Display different matched sets ##
mset <- PM.results_mahalanobis$att[9]

DisplayTreatment(unit.id = "country_id",
                 time.id = "year", legend.position = "none",
                 xlab = "year", ylab = "Country Code",
                 treatment = "dem", data = vdem,
                 matched.set = mset, # this way we highlight the particular set
                 show.set.only = TRUE)


summary(PM.results_mahalanobis$att)
summary(PM.results_psmatch$att)
summary(PM.results_psweight$att)
summary(PM.results_cbpsmatch$att)
summary(PM.results_cbpsweight$att)


par(mfrow=c(1,5)) 
plot(PM.results_mahalanobis$att, main ="Distribution of Matched Set Sizes\n (Mahalanobis Matching)")
plot(PM.results_psmatch$att,main = "Distribution of Matched Set Sizes\n (Propenstiy ScoreMatching)")
plot(PM.results_psweight$att,main = "Distribution of Matched Set Sizes\n (Propenstiy Score Weighting)")
plot(PM.results_cbpsmatch$att,main = "Distribution of Matched Set Sizes\n (Covariate Propensity Score Matching)")
plot(PM.results_cbpsweight$att,main = "Distribution of Matched Set Sizes\n (Covariate Propenstiy Score Weighting)")


dev.copy(pdf,'outputs/Step4/Figure_Frequency_Distribution_log.pdf', width = 10, height = 5)
dev.off()


#### Check Improved Covariate Balance due to Matching over the Pre-Treatment Time ####

## Get covariate balance ##

df_none <- get_covariate_balance(PM.results.none$att,
                                 data = vdem,
                                 covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct",  "gdppc_ppp_bcbt_growth_rate_5avg", "y"),
                                 plot = FALSE)

df_mahalanobis <- get_covariate_balance(PM.results_mahalanobis$att,
                                        data = vdem,
                                        covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct", "gdppc_ppp_bcbt_growth_rate_5avg", "y"),
                                        plot = FALSE)

df_psmatch <- get_covariate_balance(PM.results_psmatch$att,
                                    data = vdem,
                                    covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct",  "gdppc_ppp_bcbt_growth_rate_5avg", "y"),
                                    plot = FALSE)

df_psweight <- get_covariate_balance(PM.results_psweight$att,
                                     data = vdem,
                                     covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct", "gdppc_ppp_bcbt_growth_rate_5avg", "y"),
                                     plot = FALSE)

df_cbpsmatch <- get_covariate_balance(PM.results_cbpsmatch$att,
                                      data = vdem,
                                      covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct",  "gdppc_ppp_bcbt_growth_rate_5avg", "y"),
                                    plot = FALSE)

df_cbpsweight <- get_covariate_balance(PM.results_cbpsweight$att,
                                     data = vdem,
                                     covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct", "gdppc_ppp_bcbt_growth_rate_5avg", "y"),
                                     plot = FALSE)

#Tidying outputs #
df_none <- as.data.frame(df_none)

df_none <- df_none %>%
  rownames_to_column("year") %>%
  pivot_longer(-year, names_to = "covariate", values_to = "sd_covariate") %>%
  mutate(year = case_when(year =="t_4" ~ -4, 
                          year =="t_3" ~ -3,
                          year =="t_2" ~ -2,
                          year =="t_1" ~ -1,
                          year == "t_0" ~ 0))

F1 <- ggplot(df_none, aes(x = year, y = sd_covariate, color = covariate)) +
  geom_line() +
  geom_hline(yintercept=0, color = "black") +
  labs(x = "", y = "", 
       title = "Before Refinement") +
  ylim(-2, 5) +
  theme(legend.position = "none", 
        plot.title = element_text(hjust = 0.5)) +
  theme_pubr()

df_mahalanobis <- as.data.frame(df_mahalanobis)

df_mahalanobis <- df_mahalanobis %>%
  rownames_to_column("year") %>%
  pivot_longer(-year, names_to = "covariate", values_to = "sd_covariate") %>%
  mutate(year = case_when(year =="t_4" ~ -4, 
                          year =="t_3" ~ -3,
                          year =="t_2" ~ -2,
                          year =="t_1" ~ -1,
                          year == "t_0" ~ 0))

F2 <- ggplot(df_mahalanobis, aes(x = year, y = sd_covariate, color = covariate)) +
  geom_line() +
  geom_hline(yintercept=0, color = "black") +
  labs(x = "", y = "", 
       title = "Mahanlanobis Distance\n Matching") +
  ylim(-2, 5) +
  theme(legend.position = "none", 
        plot.title = element_text(hjust = 0.5)) +
  theme_pubr()

df_psmatch <- as.data.frame(df_psmatch)

df_psmatch <- df_psmatch %>%
  rownames_to_column("year") %>%
  pivot_longer(-year, names_to = "covariate", values_to = "sd_covariate") %>%
  mutate(year = case_when(year =="t_4" ~ -4, 
                          year =="t_3" ~ -3,
                          year =="t_2" ~ -2,
                          year =="t_1" ~ -1,
                          year == "t_0" ~ 0))

F3 <- ggplot(df_psmatch, aes(x = year, y = sd_covariate, color = covariate)) +
  geom_line() +
  geom_hline(yintercept=0, color = "black") +
  labs(x = "", y = "", 
       title = "Propensity Score\n Matching") +
  ylim(-2, 5) +
  theme(legend.position = "none", 
        plot.title = element_text(hjust = 0.5)) +
  theme_pubr()


df_psweight <- as.data.frame(df_psweight)

df_psweight <- df_psweight %>%
  rownames_to_column("year") %>%
  pivot_longer(-year, names_to = "covariate", values_to = "sd_covariate") %>%
  mutate(year = case_when(year =="t_4" ~ -4, 
                          year =="t_3" ~ -3,
                          year =="t_2" ~ -2,
                          year =="t_1" ~ -1,
                          year == "t_0" ~ 0))

F4 <- ggplot(df_psweight, aes(x = year, y = sd_covariate, color = covariate)) +
  geom_line() +
  geom_hline(yintercept=0, color = "black") +
  labs(x = "", y = "", 
       title = "Propensity Score\n Weighting") +
  ylim(-2, 5) +
  theme(legend.position = "none", 
        plot.title = element_text(hjust = 0.5)) +
  theme_pubr()


df_cbpsmatch <- as.data.frame(df_cbpsmatch)

df_cbpsmatch <- df_cbpsmatch %>%
  rownames_to_column("year") %>%
  pivot_longer(-year, names_to = "covariate", values_to = "sd_covariate") %>%
  mutate(year = case_when(year =="t_4" ~ -4, 
                          year =="t_3" ~ -3,
                          year =="t_2" ~ -2,
                          year =="t_1" ~ -1,
                          year == "t_0" ~ 0))

F5 <- ggplot(df_cbpsmatch, aes(x = year, y = sd_covariate, color = covariate)) +
  geom_line() +
  geom_hline(yintercept=0, color = "black") +
  labs(x = "", y = "", 
       title = "Covariate Balancing Propensity Score\n Matching") +
  ylim(-2, 5) +
  theme(legend.position = "none", 
        plot.title = element_text(hjust = 0.5)) +
  theme_pubr()



df_cbpsweight <- as.data.frame(df_cbpsweight)

df_cbpsweight <- df_cbpsweight %>%
  rownames_to_column("year") %>%
  pivot_longer(-year, names_to = "covariate", values_to = "sd_covariate") %>%
  mutate(year = case_when(year =="t_4" ~ -4, 
                          year =="t_3" ~ -3,
                          year =="t_2" ~ -2,
                          year =="t_1" ~ -1,
                          year == "t_0" ~ 0))

F6 <- ggplot(df_cbpsweight, aes(x = year, y = sd_covariate, color = covariate)) +
  geom_line() +
  geom_hline(yintercept=0, color = "black") +
  labs(x = "", y = "", 
       title = "Covariate Balanacing Propensity Score\n Weighting") +
  ylim(-2, 5) +
  theme(legend.position = "none", 
        plot.title = element_text(hjust = 0.5)) +
  theme_pubr()


Figure1 <- ggarrange(F1, F2, F3, F4, F5, F6, 
                     ncol = 3, nrow = 2, 
                     common.legend = TRUE)

annotate_figure(Figure1, 
                left = text_grob("Standardized Mean Differences\n for Democratization", rot = 90))
ggsave("outputs/Step4/Figure_Covariate_Balance_Autocratization.pdf", height = 15, width = 35, units= c("cm"))


#### Improved Covariate Balance due to the Refinement of Matched Sets. 

par(mfrow=c(2,3)) 
balance_scatter(non_refined_set = PM.results.none$att,
                refined_list = list(PM.results_mahalanobis$att),
                data = vdem,
                main = "Mahalanobis",
                covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct", "gdppc_ppp_bcbt_growth_rate_5avg", "y"))
balance_scatter(non_refined_set = PM.results.none$att,
                refined_list = list(PM.results_psmatch$att),
                data = vdem,
                main = "PS Matching",
                covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct",  "gdppc_ppp_bcbt_growth_rate_5avg",  "y"))
balance_scatter(non_refined_set = PM.results.none$att,
                refined_list = list(PM.results_psweight$att),
                data = vdem,
                main = "PS Weighting",
                covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct", "gdppc_ppp_bcbt_growth_rate_5avg", "y"))
balance_scatter(non_refined_set = PM.results.none$att,
                refined_list = list(PM.results_cbpsmatch$att),
                data = vdem,
                main = "CBPS Matching",
                covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct",  "gdppc_ppp_bcbt_growth_rate_5avg", "y"))
balance_scatter(non_refined_set = PM.results.none$att,
                refined_list = list(PM.results_cbpsweight$att),
                data = vdem,
                main = "CBPS Weighting",
                covariates = c("v2x_polyarchy", "Maddison2020_pop_mean_log",  "v2svstterr", "v2clrspct",  "gdppc_ppp_bcbt_growth_rate_5avg", "y"))

dev.copy(pdf,'outputs/Step4/Figure_Improved_Covariate_Balance.pdf', width = 10, height = 5)
dev.off()


#### Empirical Findings of Democratization Treatment ####

## Model 1 ##
M1 <- PanelEstimate(sets = PM.results_mahalanobis, data = vdem)
summary(M1)

par(mfrow=c(1,1))
plot(M1)

## Model 2 ##
M2 <- PanelEstimate(sets = PM.results_psmatch, data = vdem)
summary(M2)

## Model 3 ##
M3 <- PanelEstimate(sets = PM.results_psweight, data = vdem)
summary(M3)

## Model 4 ##
M4 <- PanelEstimate(sets = PM.results_cbpsweight, data = vdem)
summary(M4)

## Model 5 ##
M5 <- PanelEstimate(sets = PM.results_cbpsmatch, data = vdem)
summary(M5)



#### Plot the results
par(mfrow=c(2,3)) 
plot(M1, main = "Mahalanobis Matchting", ylab= "Estimated Effect of Democratization", xlab = "", ylim = c(-5, 50))
plot(M2, main = "Propensity Score Matching", ylab= "", xlab = "", ylim =  c(-5, 50))
plot(M3, main = "Propensity Score Weighting", ylab= "", xlab = "", ylim =  c(-5, 50))
plot(M4, main = "CBPS Weighting", ylab= "Estimated Effect of Democratization", xlab = "", ylim =  c(-5, 50))
plot(M5, main = "CBPS Matching", ylab= "", xlab = "", ylim =  c(-5, 50))

dev.copy(pdf,'outputs/Step4/A_Figures_Democratization_Estimates_log.pdf', width = 10, height =6)
dev.off()

## Plotting the results in Paper Design ##

M1_df <- data.frame(M1$estimates, M1$standard.error, M1$lead)

M1_df$min95 <-   M1$estimates- 1.96*M1$standard.error
M1_df$max95 <-   M1$estimates+ 1.96*M1$standard.error

ggplot(data = M1_df, aes(x = M1.lead)) +
  geom_ribbon(aes(ymin=min95, ymax = max95), fill = "#495e35", alpha = 0.5) +
  geom_line(aes(y=min95), color = "#999999", linetype = "dotdash", size = 1.05) +
  geom_line(aes(y=max95), linetype = "dotdash", color = "#999999", size = 1.05) +
  geom_line(aes(y=M1.estimates), size = 1.3, color = "black") +
  geom_hline(aes(yintercept = 0), color = "red", linetype = "dashed", alpha = 0.5) +
  theme_pubr()+
  scale_x_continuous(breaks=seq(0,20,1)) +
  scale_y_continuous(breaks= seq(-10,30,5), limits = c(-10, 25))+
  labs(x = "Year around democratization", 
       y = "Estimated Effect of Democratization") +
  theme(axis.text=element_text(size=14),
        axis.title=element_text(size=16,face="bold"))


ggsave("outputs/Step4/Figure_4.pdf", units= c("cm"), width = 40, height = 20, dpi = 900)
ggsave("outputs/Step4/Figure_4.eps", units= c("cm"), width = 40, height = 20, dpi = 900)




